library(data.table)
library(R.utils)
library(stringr)
library(ggcorrplot)
theme_set(theme_bw())
fnm1 = "GSE120575_Sade_Feldman_melanoma_single_cells_nolabel_1.txt.gz"
fnm2 = "GSE120575_Sade_Feldman_melanoma_single_cells_2.txt.gz"
sf_tpm1 = fread(paste0("../../scRNAseq/Sade_Feldman_2018/", fnm1),
fill = TRUE, header = TRUE)
sf_tpm2 = fread(paste0("../../scRNAseq/Sade_Feldman_2018/", fnm2),
fill = TRUE, drop = 16293, col.names = colnames(sf_tpm1))
ls_tpm = list(sf_tpm1,sf_tpm2)
rm(sf_tpm1)
rm(sf_tpm2)
sf_tpm = rbindlist(ls_tpm)
rm(ls_tpm)
dim(sf_tpm) # 55737 x 16292
## [1] 55737 16292
sf_tpm[1:2,1:4]
gene.names = sf_tpm$V1
rownames(sf_tpm) = gene.names
sf_tpm$V1 = NULL
Only select classified cells.
cell_type = read.table("./output/cell_type.txt", header = TRUE,
sep = "\t", as.is = TRUE)
dim(cell_type)
## [1] 16291 4
cell_type[1:2,]
table(cell_type$type, useNA="ifany")
##
## B_cells CD4T_memory CD8T_B
## 1427 1099 2322
## CD8T_G Dendritic_cells Monocytes_Macrophages
## 3036 281 1330
## NK1 NK2 NK3
## 456 1961 1096
## Plasma_cells Tregs unclustered
## 284 1252 1747
clustered_cells = cell_type$cell[cell_type$type != "unclustered"]
cluster_sf_tpm = sf_tpm[, ..clustered_cells]
rownames(cluster_sf_tpm) = gene.names
dim(cluster_sf_tpm)
## [1] 55737 14544
cluster_sf_tpm[1:3,1:3]
rm(sf_tpm)
rm(clustered_cells)
Calculate the proportion of cells that each gene is expressed in
cellprop.per.gene = rowMeans(cluster_sf_tpm > 0)
table(cellprop.per.gene > 0.02)
##
## FALSE TRUE
## 41139 14598
he.genes = gene.names[cellprop.per.gene > 0.02]
HEgenes_cluster_tpm = cluster_sf_tpm[cellprop.per.gene > 0.02, ]
rownames(HEgenes_cluster_tpm) = he.genes
dim(HEgenes_cluster_tpm)
## [1] 14598 14544
rm(cluster_sf_tpm)
rm(cellprop.per.gene)
sig_gene = fread("./output/SF2018_sig_gene_matrix.txt")
dim(sig_gene)
## [1] 594 12
sig_gene[1:2,]
sig_mtrx = data.matrix(sig_gene[, -1])
dim(sig_mtrx)
## [1] 594 11
sig_mtrx[1:2,]
## B_cells CD4T_memory CD8T_B CD8T_G Dendritic_cells
## [1,] 0.04348984 0.1219290 0.7313351 0.3241238 0.008256228
## [2,] 0.02385424 0.2537671 2.1449182 0.4139888 0.079608541
## Monocytes_Macrophages NK1 NK2 NK3 Plasma_cells Tregs
## [1,] 0.05795489 0.6226316 0.2564559 0.7853011 0.08524648 0.4924681
## [2,] 0.35542857 2.4847149 0.5287404 1.8980383 0.59714789 3.4090974
cr1 = cor(sig_mtrx)
ggcorrplot(cr1, tl.cex = 6)
round(cr1[c(2:4,7:9),c(2:4,7:9)], 2)
## CD4T_memory CD8T_B CD8T_G NK1 NK2 NK3
## CD4T_memory 1.00 0.23 0.55 0.18 0.66 0.40
## CD8T_B 0.23 1.00 0.82 0.57 0.67 0.89
## CD8T_G 0.55 0.82 1.00 0.31 0.84 0.79
## NK1 0.18 0.57 0.31 1.00 0.35 0.50
## NK2 0.66 0.67 0.84 0.35 1.00 0.84
## NK3 0.40 0.89 0.79 0.50 0.84 1.00
sig_info = fread("./output/SF2018_sig_gene_details.txt")
dim(sig_info)
## [1] 14598 56
sig_info[1:2,c(1:4,(ncol(sig_info)-3):ncol(sig_info)), with=F]
cluster_ct = cell_type$type[match(names(HEgenes_cluster_tpm), cell_type$cell)]
marker_col = grep("^marker_", names(sig_info), value = TRUE)
u_col = grep("^u_", names(sig_info), value = TRUE)
fc_col = grep("^fc_", names(sig_info), value = TRUE)
plot1 <- function(gene1, edata, cluster_ct, sig_info){
y = as.numeric(edata[which(rownames(edata) == gene1),])
df1 = data.frame(expression = y, cell_type = cluster_ct)
g1 = ggplot(df1, aes(x=cell_type, y=expression, fill=cell_type)) +
geom_boxplot() + coord_flip() + theme(legend.position = "none") +
ggtitle(gene1)
print(g1)
ct = marker_col[unlist(sig_info[gene==gene1, marker_col, with=FALSE])==1]
print(ct)
sig_info[gene==gene1, c("gene", u_col), with=F]
sig_info[gene==gene1, c("gene", fc_col), with=F]
}
plot1("FOXP3", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_Tregs"
plot1("GZMA", HEgenes_cluster_tpm, cluster_ct, sig_info)
## character(0)
plot1("PDCD1", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_CD8T_B"
plot1("CTLA4", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_Tregs"
plot1("LAG3", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_CD8T_B"
plot1("CD3E", HEgenes_cluster_tpm, cluster_ct, sig_info)
## character(0)
plot1("CD4", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_Tregs"
plot1("CD8A", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_CD8T_B" "marker_CD8T_G"
plot1("CD33", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_Monocytes_Macrophages"
plot1("CD14", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_Monocytes_Macrophages"
plot1("CD19", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_B_cells"
plot1("LMNA", HEgenes_cluster_tpm, cluster_ct, sig_info)
## character(0)
plot1("FASLG", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_CD8T_B" "marker_NK3"
plot1("VCAM1", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_CD8T_B" "marker_NK3"
plot1("LEF1", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_CD4T_memory"
sig_info$gene[sig_info$marker_NK1 == 1]
## [1] "ANLN" "ASF1B" "ASPM" "AURKB"
## [5] "BIRC5" "BUB1" "BUB1B" "CASC5"
## [9] "CCNA2" "CCNB2" "CDC20" "CDC45"
## [13] "CDCA2" "CDCA3" "CDCA5" "CDCA8"
## [17] "CDK1" "CDKN3" "CIT" "CKAP2L"
## [21] "DEPDC1B" "DHFRP1" "DLGAP5" "DTL"
## [25] "E2F2" "ESCO2" "GTSE1" "HIST1H3B"
## [29] "HIST1H3G" "HJURP" "KIAA0101" "KIF11"
## [33] "KIF15" "KIF18B" "KIF23" "KIF2C"
## [37] "KIFC1" "MELK" "MKI67" "MLF1IP"
## [41] "NCAPG" "NUF2" "PBK" "PKMYT1"
## [45] "PLK1" "POLQ" "RAD51" "RP11-424C20.2"
## [49] "RRM2" "SKA1" "SPC24" "SPC25"
## [53] "TK1" "TOP2A" "TPX2" "TYMS"
## [57] "UBE2C" "UHRF1" "ZWINT"
plot1("CCNA2", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_CD8T_B" "marker_NK1"
plot1("CCNB2", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_NK1"
plot1("KIF11", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_NK1"
sig_info$gene[sig_info$marker_NK2 == 1]
## [1] "ADRB2" "AE000661.37" "B3GNT7" "BIN2P1"
## [5] "C1orf21" "CA5B" "CDC34" "COLQ"
## [9] "CX3CR1" "EPHA4" "FGFBP2" "GNLY"
## [13] "GOLGA8I" "IL18RAP" "KIR2DL3" "KIR2DS4"
## [17] "KIR3DL1" "KIR3DL2" "KLRB1" "KLRC1"
## [21] "KLRF1" "LGR6" "LINC00299" "MATK"
## [25] "MMP25" "MPP7" "MYBL1" "NCAM1"
## [29] "NCR1" "NMUR1" "OSBPL5" "P2RY8"
## [33] "PDGFD" "PDZD8" "PITPNC1" "PLEKHG3"
## [37] "PRKX" "PRKY" "PRSS23" "PTGDR"
## [41] "PZP" "RAP1GAP2" "RP11-473M20.7" "RP11-53I6.2"
## [45] "S1PR5" "SEMA4C" "SH2D1B" "SOCS2"
## [49] "SPON2" "TGFBR3" "TMIGD2" "TNFSF14"
## [53] "TRDC" "TRGV7" "TRGV9" "TSPAN32"
## [57] "TXK" "YES1" "ZBTB16"
plot1("FGFBP2", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_NK2"
plot1("KIR2DL3", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_NK2"
plot1("KLRC1", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_NK2"
plot1("NCR1", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_NK2" "marker_NK3"
plot1("GNLY", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_NK2"
plot1("TRDC", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_NK2" "marker_NK3"
plot1("TRGV7", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_NK2"
plot1("TRGV9", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_NK2" "marker_NK3"
sig_info$gene[sig_info$marker_NK3 == 1]
## [1] "AC006460.2" "AC069363.1" "AC092580.4" "ACTBP12"
## [5] "AE000661.37" "AFAP1L2" "B3GAT1" "CADM1"
## [9] "CAV1" "CD200R1" "CHI3L2" "CPNE7"
## [13] "CRTAM" "CTD-2313F11.1" "CXCR6" "DAPK2"
## [17] "DBN1" "DTHD1" "DTX3" "ENC1"
## [21] "F2R" "FAM179A" "FASLG" "FXYD2"
## [25] "GNG4" "GZMK" "HAVCR1" "HAVCR2"
## [29] "IKZF2" "JAKMIP1" "KIAA1671" "KIR2DL4"
## [33] "KIR3DL2" "LINC00484" "LINC00539" "NCR1"
## [37] "NDFIP2" "RDH10" "RP11-222K16.2" "RP11-277P12.20"
## [41] "RP11-305L7.1" "RP11-4F5.2" "RTKN2" "SLC27A2"
## [45] "TIMD4" "TMEM155" "TRDC" "TRDV1"
## [49] "TRGC1" "TRGC2" "TRGV2" "TRGV4"
## [53] "TRGV5" "TRGV8" "TRGV9" "TSC22D1"
## [57] "TTC24" "VCAM1" "ZNF80"
plot1("GZMK", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_NK3"
plot1("HAVCR1", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_NK3"
plot1("KIR2DL4", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_CD8T_B" "marker_NK3"
plot1("TRGC1", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_NK3"
plot1("TRGC2", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_NK3"
plot1("TRGV2", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_NK3"
plot1("TRGV4", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_NK3"
plot1("TRGV5", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_NK3"
plot1("TRGV8", HEgenes_cluster_tpm, cluster_ct, sig_info)
## [1] "marker_NK3"
gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 1095531 58.6 3236734 172.9 NA 3236734 172.9
## Vcells 305852484 2333.5 1090273317 8318.2 32768 2129439458 16246.4
sessionInfo()
## R version 3.6.2 (2019-12-12)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Catalina 10.15.7
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggcorrplot_0.1.3 ggplot2_3.3.1 stringr_1.4.0 R.utils_2.9.2
## [5] R.oo_1.23.0 R.methodsS3_1.8.0 data.table_1.12.8
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.3 plyr_1.8.5 compiler_3.6.2 pillar_1.4.3
## [5] tools_3.6.2 digest_0.6.23 jsonlite_1.6.1 evaluate_0.14
## [9] lifecycle_0.2.0 tibble_3.0.1 gtable_0.3.0 pkgconfig_2.0.3
## [13] rlang_0.4.6 yaml_2.2.1 xfun_0.12 withr_2.1.2
## [17] dplyr_0.8.4 knitr_1.28 vctrs_0.3.0 grid_3.6.2
## [21] tidyselect_1.0.0 glue_1.3.1 R6_2.4.1 rmarkdown_2.1
## [25] farver_2.0.3 reshape2_1.4.3 purrr_0.3.3 magrittr_1.5
## [29] scales_1.1.0 htmltools_0.4.0 ellipsis_0.3.0 assertthat_0.2.1
## [33] colorspace_1.4-1 labeling_0.3 stringi_1.4.5 munsell_0.5.0
## [37] crayon_1.3.4